## Warning: Missing column names filled in: 'X1' [1]
income_hiv %>% 
  filter(year != "2011" & age != "All") %>%
  lm(hiv_diagnoses ~ borough + gender + age + mid_income, data = .) %>% 
  summary()
## 
## Call:
## lm(formula = hiv_diagnoses ~ borough + gender + age + mid_income, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.106  -3.702  -1.040   2.239  50.426 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           9.835e-01  3.024e-01   3.252  0.00115 ** 
## boroughBrooklyn       2.975e-01  2.807e-01   1.060  0.28922    
## boroughManhattan      3.091e+00  3.313e-01   9.332  < 2e-16 ***
## boroughQueens        -1.245e+00  2.588e-01  -4.811 1.53e-06 ***
## boroughStaten Island -4.376e+00  3.972e-01 -11.016  < 2e-16 ***
## genderMale            6.083e+00  1.515e-01  40.138  < 2e-16 ***
## age20 - 29            9.600e+00  2.625e-01  36.576  < 2e-16 ***
## age30 - 39            6.870e+00  2.625e-01  26.175  < 2e-16 ***
## age40 - 49            4.627e+00  2.625e-01  17.627  < 2e-16 ***
## age50 - 59            2.355e+00  2.625e-01   8.972  < 2e-16 ***
## age60+                4.267e-01  2.625e-01   1.626  0.10406    
## mid_income           -1.238e-04  6.938e-06 -17.851  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.682 on 7764 degrees of freedom
## Multiple R-squared:  0.3594, Adjusted R-squared:  0.3585 
## F-statistic: 395.9 on 11 and 7764 DF,  p-value: < 2.2e-16
income_hiv %>% 
  filter(year != "2011" & race != "All") %>%
  lm(hiv_diagnoses ~ borough + gender + race + mid_income, data = .) %>% 
  summary()
## 
## Call:
## lm(formula = hiv_diagnoses ~ borough + gender + race + mid_income, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.319  -5.652  -1.628   2.949  84.026 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.514e+00  5.142e-01   2.945  0.00324 ** 
## boroughBrooklyn       3.570e-01  4.929e-01   0.724  0.46898    
## boroughManhattan      3.710e+00  5.818e-01   6.376 1.95e-10 ***
## boroughQueens        -1.494e+00  4.545e-01  -3.287  0.00102 ** 
## boroughStaten Island -5.251e+00  6.976e-01  -7.527 5.90e-14 ***
## genderMale            7.299e+00  2.662e-01  27.425  < 2e-16 ***
## raceBlack             1.093e+01  4.208e-01  25.978  < 2e-16 ***
## raceLatino/Hispanic   9.027e+00  4.208e-01  21.451  < 2e-16 ***
## raceOther/Unknown    -1.380e+00  4.208e-01  -3.278  0.00105 ** 
## raceWhite             3.628e+00  4.208e-01   8.621  < 2e-16 ***
## mid_income           -1.486e-04  1.218e-05 -12.197  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.71 on 6469 degrees of freedom
## Multiple R-squared:  0.2699, Adjusted R-squared:  0.2687 
## F-statistic: 239.1 on 10 and 6469 DF,  p-value: < 2.2e-16
income_plot = income_hiv %>% 
  filter(year != "2011") %>% 
  group_by(uhf, year) %>% 
  summarise(sum_hiv = mean(hiv_diagnoses), mid_in = median(mid_income)) %>% 
  ggplot(aes(x = mid_in, y = sum_hiv, color = year)) +
  geom_point() + 
  theme_bw() +
  theme(legend.position = "None")
ggplotly(income_plot)

Income distribution in different neighborhood

income_dist = income_hiv %>% 
  ggplot(aes(y = mid_income, x = uhf)) +
  geom_point(alpha = 0.1) +
  coord_flip() + 
  theme_bw()
ggplotly(income_dist)